import re
url_text = open("cnBeta.html")
pattern = re.compile(r'(https?://.+?)["|\']')
for line in url_text:
    url_list = re.findall(pattern, line)
    if len(url_list)!=0:
        print url_list